Celestin Apprentice 5

home *** CD-ROM | disk | FTP | other *** search

/ Celestin Apprentice 5 / Apprentice-Release5.iso / Demos / Evatac Software / Preditor 3.0 / Tools / Language Module Builder / Sources / HtmlParse.c < prev next >

Wrap

Text File | 1996-02-04 | 15KB | 758 lines

/************************************************************ HtmlParse.c C Source to Preditor 3 Language Module Code for the "HTML" language © Copyright Evatac Software 1988-1996 All rights reserved ************************************************************/ #include "HtmlParse.h" #include <SetupA4.h> #include <MixedMode.h> #include <Ctype.h> #ifndef THINKC #include <A4Stuff.h> #else #define SetCurrentA4() 0; RememberA4() #define SetA4(x) SetUpA4() #endif #ifdef powerc ProcInfoType __procinfo = LanguageUPPInfo; #endif // Remove the comment on the following line to include hrefs in reference popup // #define INCLUDE_HREFS 1 static languageGlobals globals; static ExternalCallbackBlock *callbacks; static Boolean inMarkup, inNegation, inHeader = false; /* * * * * HTML LANGUAGE INDENT HANDLER * * * * * * */ static long _languageConvertToTabs( Char *text, long length, long hardTab ) { int tabs = length / hardTab; int spaces = length % hardTab; int newLen = tabs + spaces; while (tabs-- > 0) *(text++) = 9; while (spaces-- > 0) *(text++) = ' '; return(newLen); } /* * _languageHandleIndent * * Indent the selected lines according to the buffer indentation settings */ static void _languageHandleIndent( void *extData ) { long anchor, end, pos, length; long lineStart; long i, x, newPos = -1; long lineNumber, endLineNumber, leading; short spacesPerTab, hardTab; Char text[256]; extGetSelection(callbacks, &anchor, &end); if (anchor > end) { pos = anchor; anchor = end; end = pos; /* Swap */ } lineNumber = extLineFromPosition(callbacks, anchor); endLineNumber = extLineFromPosition(callbacks, end); /* * Indent each line in the selection */ while (lineNumber <= endLineNumber) { if (lineNumber <= 1) { lineNumber++; continue; } leading = extGetLeading(callbacks, lineNumber, &length, &spacesPerTab, &hardTab); lineStart = extLineToPosition(callbacks, lineNumber); /* * Select the leading spaces/tabs */ extSetSelection(callbacks, lineStart, lineStart + length); /* * Scan back previous lines for a line that we can relate to */ x = 1; for (;;) { if (lineNumber - x < 1) break; leading = extGetLeading(callbacks, lineNumber - x, &length, &spacesPerTab, &hardTab); end = extLineEnd(callbacks, lineNumber - x); pos = extLineToPosition(callbacks, lineNumber - x); /* Skip Blank lines */ if (length == (end - pos)) { x++; continue; } i = pos + length; /* * Perform Indention Smarts (Still under development) */ extScanContents(callbacks, i); /* while (i++ < end && extNextScanCharacter(callbacks, &ch)) { if (ch == '{') { leading += spacesPerTab; break; } } */ extDoneScan(callbacks); /* * Indent the line */ i = _languageConvertToTabs(text, leading, hardTab); extInsert(callbacks, text, i); if (newPos == -1) newPos = lineStart + i; break; } lineNumber++; } if (newPos >= 0) extSetSelection(callbacks, newPos, newPos); } /* * * * * C LANGUAGE PARSER * * * * * * */ /* * _languageBuildString * * Build up a literal string or literal contant "foo" or 'foo' */ static void _languageBuildString( languageToken *token, int c ) { Int32 index = 1, size = kTokenStringSize; int origC = c; token->string[1] = c; token->type = (c == '\"' ? kSymbolStringLiteral : kSymbolCharConstant); if (c == 'l' || c == 'L') { c = languageGetChar(&globals, callbacks); token->string[0] = c; index = 2; } while ((c = languageGetChar(&globals, callbacks)) != -1) { if (index < size) token->string[++index] = c; if (c == origC) break; else if (c == '\\') { c = languageGetChar(&globals, callbacks); if (c != -1) { if (index < size) token->string[++index] = c; } } } token->string[0] = index; /* So string can be used as C or Pascal string */ token->string[++index] = 0; } /* * _languageBuildWhiteSpace * * Build up a directive (i.e. #define, etc) */ static void _languageBuildWhiteSpace( languageToken *token, int c ) { token->type = kSymbolWhiteSpace; while ((c = languageGetChar(&globals, callbacks)) != -1) { if (c != ' ' && c != '\t' && c != '\v' && c != '\n' && c != '\r' && c != '\f' && c != '\b') { // if (!isspace(c)) { languageUngetChar(&globals, c); return; } } } /* * _languageBuildContent * * */ static void _languageBuildContent( languageToken *token, int c ) { Int32 index = 0, size = kTokenStringSize; int origC = c; token->type = kSymbolContent; token->string[++index] = c; while ((c = languageGetChar(&globals, callbacks)) != -1) { if (c == '<') { languageUngetChar(&globals, c); break; } if (index < size) token->string[++index] = c; } token->string[0] = index; /* So string can be used as C or Pascal string */ token->string[++index] = 0; token->endLocation = globals.position; } /* * _languageBuildComment * */ static void _languageBuildComment( languageToken *token, int c ) { Boolean wasDash; token->type = kSymbolComment; globals.startLastComment = globals.position; c = languageGetChar(&globals, callbacks); if (c =='-') { wasDash = false; while ((c = languageGetChar(&globals, callbacks)) != -1) { if (c == '-') wasDash = true; else if (c != '/' || !wasDash) wasDash = false; else break; } } } /* * _languageBuildNumber * */ static void _languageBuildNumber( languageToken *token, int c ) { token->type = kSymbolIntConstant; if (c == '0') { c = languageGetChar(&globals, callbacks); if (c == 'x' || c == 'X') { while ((c = languageGetChar(&globals, callbacks)) != -1) { if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) ; else break; } } else { while (c != -1) { if (c >= '0' && c <= '7') ; else break; c = languageGetChar(&globals, callbacks); } } } else { /* decimal */ while ((c = languageGetChar(&globals, callbacks)) != -1) { if (c >= '0' && c <= '9') ; else break; } if (c == '.') { token->type = kSymbolFloatConstant; while ((c = languageGetChar(&globals, callbacks)) != -1) { if (c >= '0' && c <= '9') ; else break; } } if (c == 'e' || c == 'E') { token->type = kSymbolFloatConstant; c = languageGetChar(&globals, callbacks); if (c == '-' || c == '+') c = languageGetChar(&globals, callbacks); while (c != -1) { if (c >= '0' && c <= '9') ; else break; c = languageGetChar(&globals, callbacks); } } } while (c != -1) { if (c == 'l' || c == 'L' || c == 'u' || c == 'U' || c == 'f' || c == 'F' || c == 'h' || c == 'H') ; else break; c = languageGetChar(&globals, callbacks); } if (c != -1) languageUngetChar(&globals, c); } /* * _languageBuildWord * * */ static void _languageBuildWord( languageToken *token, int c ) { Int32 index = 1, size = kTokenStringSize; Char *scan, *target; Char lowerStr[kTokenStringSize + 2]; token->type = kSymbolIdentifier; token->string[1] = c; while ((c = languageGetChar(&globals, callbacks)) != -1) { if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_' || c >= '0' && c <= '9') { if (index < size) token->string[++index] = c; else token->string[index] = '…'; } else { languageUngetChar(&globals, c); break; } } token->string[0] = index; /* So string can be used as C or Pascal string */ token->string[++index] = 0; /* * lower case the string for lookup */ scan = token->string; target = lowerStr; while (*scan != 0) { *(target++) = tolower(*scan); scan++; } *target = 0; if (!inMarkup) return; /* * Since hashing into a large reserved word table takes time, the reserved * word table is not loaded for "function" scanning. We do our own * limited keyword check */ if (!languageHasTable(&globals)) { scan = lowerStr + 1; if (languageCStringCompare(scan, (Char *) "href") == 0) token->type = kSymbolReservedWord; else if ((*scan == 'h' || *scan == 'H') && (scan[1] >= '0' && scan[1] <= '5')) token->type = kSymbolReservedWord; return; } else if (languageTableLookup((&globals), lowerStr + 1)) token->type = kSymbolReservedWord; else if (languageCustomTableLookup((&globals), lowerStr + 1)) token->type = kSymbolCustomWord; } /* * _languageGetNextToken */ static languageToken *_languageGetNextToken(void) { int first, second; Int16 previousType; languageToken *token = &globals.token; previousType = token->type; token->startLocation = globals.position; token->majorType = -1; if ((first = languageGetChar(&globals, callbacks)) == -1) return(nil); token->type = first; second = languagePeekChar(&globals, callbacks); if (!inMarkup && first != '<') { _languageBuildContent(token, first); return(token); } switch(first) { /* "strings" */ /* 'character constants' */ case '\"': case '\'': if (inMarkup) _languageBuildString(token, first); break; /* white space */ case ' ': case '\t': case '\v': case '\n': case '\r': case '\f': case '\b': _languageBuildWhiteSpace(token, first); break; /* Markup langauge */ case '<': if (second == '-') { /* Comment */ _languageBuildComment(token, first); } else { inMarkup = true; inNegation = second == '/'; } break; case '>': inMarkup = false; inNegation = false; globals.startLastComment = -1; break; /* monographs */ case ';': case '(': case '#': case ')': case '[': case ']': case '}': case '{': case '~': case '*': case '%': case '\\': case ',': case '?': case '/': break; /* = */ case '=': break; /* the rest */ default: if (first >= '0' && first <= '9') _languageBuildNumber(token, first); else if (first >= 'a' && first <= 'z' || first >= 'A' && first <= 'Z' || first == '_') _languageBuildWord(token, first); /* Something weird, let the parser decide. */ break; } token->endLocation = globals.position; return(token); } void languageMain( ExternalCallbackBlock *extCallbacks, WindowRef window, long options, void *extData ); /* * languageMain * * This is the main entrypoint to the CODE module of a language module. * The following operations are defined: * * kLanguageParse Parse the source file, returning positions of all tokens * in the file. * kLanguageFunctions Parse the source file, returning the position of just the * functions in the source file * kLanguageIncludes Parse the source file, returning the #include files * kLanguageTemplate Expand macro -- insert template * kLanguageIndentLine * kLanguageElectric Handle electric characters (i.e. }, {, ; ) */ void main( ExternalCallbackBlock *extCallbacks, WindowRef window, long options, void *extData ) { languageToken *token; Int16 type; Char *ptr; long saved_a4; saved_a4 = SetCurrentA4(); inMarkup = false; globals.startLastComment = -1; languageInit(&globals, extCallbacks, options); callbacks = extCallbacks; if (options == kLanguageTemplate) { languageDefaultHandler(&globals, callbacks, options, extData); } else if (options == kLanguageIndent) { _languageHandleIndent(extData); } else if (options <= kLanguageIncludes) { /* * Now parse the file, returning a series of valid return token types: * * kReference * kKeyword * kComment * kCustomKeyword * kHeader */ while ((token = _languageGetNextToken()) != nil) { type = token->type; if (type == kSymbolReservedWord) { token->majorType = kKeyword; ptr = token->string; if ((ptr[1] == 'h' && ptr[2] == 'r' && ptr[0] == 4 || ptr[1] == 'H' && ptr[2] == 'R' && ptr[0] == 4) && options == kLanguageParse && inMarkup) { _languageBuildWhiteSpace(token, 0); /* Now get the equal sign */ if ((token = _languageGetNextToken()) != nil && token->type == kSymbolEqual) { _languageBuildWhiteSpace(token, 0); if ((token = _languageGetNextToken()) != nil && token->type == kSymbolStringLiteral) { token->majorType = kReference; token->commentLocation = globals.startLastComment; extTokenReturn(callbacks, token); continue; } } } if (options == kLanguageParse) extTokenReturn(callbacks, token); if (inMarkup && (ptr[1] == 'h' || ptr[2] == 'H') && ptr[2] >= '0' && ptr[2] <= '9' && ptr[0] == 2 && options != kLanguageIncludes) { inHeader = !inNegation; } continue; } else if (type == kSymbolCustomWord) { token->majorType = kCustomKeyword; } else if (type == kSymbolComment) token->majorType = kComment; else if (type == kSymbolContent) { token->majorType = (inHeader ? kHeader : kContent); if (options != kLanguageParse && inHeader) { int len = token->string[0]; Char *ptr = token->string + 1; /* Make sure there is something in the header */ while (len-- > 0) { Char c = *(ptr++); if (c != ' ' && c != '\t' && c != '\v' && c != '\n' && c != '\r' && c != '\f' && c != '\b' ) { token->commentLocation = globals.startLastComment; extTokenReturn(callbacks, token); break; } } continue; } } /* * Only return a token if it's a interesting token, and * if we are doing a full parse */ if (token != nil && token->majorType >= 0 && options == kLanguageParse) extTokenReturn(callbacks, token); } } /* * Clean up after ourselves */ languageDone(&globals, callbacks); SetA4(saved_a4); }